from lxml import html


def parse():
    """ 将html文件中的内容，使用xpath进行提取 """
    # 读取文件中的内容
    f = open('./static/index.html', 'r', encoding='utf-8')
    s = f.read()

    selector = html.fromstring(s)
    # 解析H3标题
    h3 = selector.xpath('/html/body/h3/text()')
    print(h3[0])

    # 解析url下面的内容
    # ul = selector.xpath('/html/body/ul/li')
    ul = selector.xpath('//ul/li')
    print(len(ul))
    for li in ul:
        print(li.xpath('text()')[0])

    # 解析url下指定的元素值
    ul2 = selector.xpath('/html/body/ul/li[@class="important"]')
    print(ul2[0].xpath('text()')[0])

    # 解析a标签的内容
    a = selector.xpath('//div[@id="container"]/a')
    print(a[0].xpath('text()')[0])
    print(a[0].xpath('@href')[0])

    # 解析p标签
    p = selector.xpath("//p")
    for s in p:
        print(s.xpath('text()'))

    p_len = len(p)
    print(p[p_len-1].xpath('text()')[0])

    p2 = selector.xpath("/html/body/p[last()]")
    print(p2[0].xpath('text()')[0])

    test = selector.xpath('/html/body/ul/li[3]/text()')
    print(test[0])

    f.close()


if __name__ == '__main__':
    parse()
